This document will show you how to create contingency tables, maps, and other visualizations using the 2009 National Household Travel Survey public-use dataset. Understanding this demo may allow you to import other weighted datsets for analysis.
# install.packages("data.table")
# install.packages("ggplot2")
# install.packages("htmlTable")
# install.packages("magrittr")
# install.packages("geojsonio")
# install.packages("devtools")
# devtools::install_github('davidgohel/ggiraph')
# import dependencies
library(data.table)
library(ggplot2)
library(devtools)
library(ggiraph)
library(geojsonio)
library(htmlTable)
library(magrittr)
# import Summarizer functions
source('make_table.R')
source('make_bar_chart.R')
source('make_html_table.R')
source('cross_tab_table.R')
source('util.R')
The first step is to read in the data. Due to the size of the csvs, we will read in only necessary columns using data.table’s fread function. We will then merge the weights with the survey data using unique identifiers.
# Read variable metadata table
variables <- fread("./data/2009/variables.csv")
# Read variable value lookup table
labels <- fread("./data/2009/labels.csv")
# There are 100 replicate weight names. Use get_wgt_names to create the vector of names.
wgt_names <- get_wgt_names('DAYWGT') #Using trip weights in this example.
# Read in the datasets containing the variables/keys necessary to perform analysis
per50wt <- fread(
input = './data/2009/per50wt.csv',
select = c('HOUSEID','PERSONID',wgt_names),
showProgress = F,
key = c('HOUSEID','PERSONID')
)
DAYV2PUB <- fread(
input = './data/2009/DAYV2PUB.csv',
select = c('HOUSEID','PERSONID','FLAG100','HHSIZE','WRKCOUNT','HHVEHCNT','HHSTATE','HHFAMINC','VMT_MILE','TRAVDAY','TRPTRANS','TRIPPURP','STRTTIME','WHYTRP1S'),
showProgress = F,
key = c('HOUSEID','PERSONID')
)
# merge the datasets
dt <- DAYV2PUB[per50wt]
# manual garbage collection calls can help free memory
invisible(gc())
We will use the make_table function to create weighted aggregate tables. Frequency, Sum, and Average are currently supported.
ex1 <- make_table(
data = dt,
agg = 'count',
factors = 'TRPTRANS',
wgt_name = 'DAYWGT'
)
ex1 %>%
use_labels(labels) %>%
cross_tab_table(round_digits = 3) %>%
make_html_table
| DAYWGT | se | |
|---|---|---|
| TRPTRANS | ||
| Airplane | 3.11E+08 | 2.79E+07 |
| Amtrak/inter city train | 9.53E+07 | 2.06E+07 |
| Appropriate skip | 8.98E+07 | 2.39E+07 |
| Bicycle | 4.08E+09 | 1.81E+08 |
| Car | 1.74E+11 | 1.57E+09 |
| Charter/tour bus | 2.41E+08 | 4.74E+07 |
| City to city bus | 5.99E+07 | 5.39E+07 |
| Commuter bus | 6.66E+08 | 8.61E+07 |
| Commuter train | 5.61E+08 | 6.66E+07 |
| Dont know | 4.36E+08 | 9.06E+07 |
| Ferry | 3.99E+07 | 9.65E+06 |
| Light electric veh (golf cart) | 1.83E+08 | 4.82E+07 |
| Local public bus | 4.69E+09 | 2.13E+08 |
| Motorcycle | 1.05E+09 | 8.70E+07 |
| Not ascertained | 3.08E+07 | 1.52E+07 |
| Other | 2.31E+09 | 1.67E+08 |
| Other truck | 1.91E+09 | 2.82E+08 |
| Pickup truck | 4.04E+10 | 7.49E+08 |
| Refused | 1.73E+08 | 3.17E+07 |
| RV | 1.03E+08 | 3.66E+07 |
| School bus | 6.71E+09 | 2.24E+08 |
| Shuttle bus | 6.51E+08 | 1.07E+08 |
| Special transit-people w/disabilities | 2.74E+08 | 3.70E+07 |
| Street car/trolley | 6.81E+07 | 1.64E+07 |
| Subway/elevated train | 1.54E+09 | 1.26E+08 |
| SUV | 6.93E+10 | 1.14E+09 |
| Taxicab | 7.38E+08 | 7.78E+07 |
| Van | 4.06E+10 | 8.95E+08 |
| Walk | 4.10E+10 | 7.53E+08 |
dt[, WRKCOUNT := as.character(WRKCOUNT)][as.numeric(WRKCOUNT) >= 4, WRKCOUNT := '4+']
ex2 <- make_table(
data = dt,
agg = 'avg',
agg_var = 'VMT_MILE',
factors = 'WHYTRP1S',
wgt_name = 'DAYWGT',
subset = 'VMT_MILE >= 0 & WHYTRP1S >= 0',
variance = 'moe' #Option to get Margin of Error, instead of default Standard Error
)
ex2 %>%
use_labels(labels) %>%
make_bar_chart
# Put a ceiling on HHSIZE and WRKCOUNT variables
dt[, HHSIZE := as.character(HHSIZE)][as.numeric(HHSIZE) >= 4, HHSIZE := '4+']
dt[, WRKCOUNT := as.character(WRKCOUNT)][as.numeric(WRKCOUNT) >= 4, WRKCOUNT := '4+']
ex3 <- make_table(
data = dt,
agg = 'person_trip_rate',
factors = c('TRAVDAY','HHSIZE','WRKCOUNT'),
wgt_name = 'DAYWGT',
subset = 'FLAG100 == 1'
)
# Create formatted "report-type" html table
ex3 %>%
use_labels(labels) %>%
cross_tab_table(round_digits = 3) %>%
make_html_table
| 1 | 2 | 3 | 4+ | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
|
TRAVDAY by WRKCOUNT |
DAYWGT | se | DAYWGT | se | DAYWGT | se | DAYWGT | se | |||
| Friday | |||||||||||
| 0 | 4.467 | 0.141 | 4.335 | 0.102 | 4.007 | 0.3 | 3.676 | 0.195 | |||
| 1 | 4.982 | 0.164 | 4.701 | 0.084 | 4.51 | 0.163 | 4.546 | 0.181 | |||
| 2 | 0 | 0 | 4.803 | 0.11 | 4.588 | 0.12 | 4.939 | 0.13 | |||
| 3 | 0 | 0 | 0 | 0 | 4.345 | 0.196 | 4.566 | 0.14 | |||
| 4+ | 0 | 0 | 0 | 0 | 0 | 0 | 4.736 | 0.323 | |||
| Monday | |||||||||||
| 0 | 4.073 | 0.142 | 4.235 | 0.091 | 4.367 | 0.313 | 3.944 | 0.157 | |||
| 1 | 4.652 | 0.166 | 4.414 | 0.136 | 4.055 | 0.128 | 4.458 | 0.148 | |||
| 2 | 0 | 0 | 4.477 | 0.119 | 4.156 | 0.127 | 4.096 | 0.103 | |||
| 3 | 0 | 0 | 0 | 0 | 4.134 | 0.199 | 4.223 | 0.175 | |||
| 4+ | 0 | 0 | 0 | 0 | 0 | 0 | 4.492 | 0.273 | |||
| Saturday | |||||||||||
| 0 | 3.952 | 0.153 | 4.126 | 0.095 | 4.068 | 0.282 | 3.782 | 0.32 | |||
| 1 | 5.117 | 0.172 | 4.711 | 0.144 | 4.548 | 0.259 | 4.379 | 0.185 | |||
| 2 | 0 | 0 | 4.841 | 0.128 | 4.654 | 0.156 | 4.619 | 0.145 | |||
| 3 | 0 | 0 | 0 | 0 | 4.062 | 0.197 | 4.377 | 0.226 | |||
| 4+ | 0 | 0 | 0 | 0 | 0 | 0 | 4.443 | 0.238 | |||
| Sunday | |||||||||||
| 0 | 3.774 | 0.124 | 3.872 | 0.136 | 3.162 | 0.291 | 3.357 | 0.304 | |||
| 1 | 4.492 | 0.122 | 4.006 | 0.144 | 3.875 | 0.166 | 4.194 | 0.134 | |||
| 2 | 0 | 0 | 4.222 | 0.107 | 3.976 | 0.139 | 3.825 | 0.118 | |||
| 3 | 0 | 0 | 0 | 0 | 4.105 | 0.187 | 4.175 | 0.301 | |||
| 4+ | 0 | 0 | 0 | 0 | 0 | 0 | 3.909 | 0.203 | |||
| Thursday | |||||||||||
| 0 | 4.344 | 0.224 | 4.331 | 0.124 | 4.117 | 0.592 | 3.761 | 0.223 | |||
| 1 | 4.611 | 0.16 | 4.429 | 0.114 | 4.261 | 0.167 | 4.677 | 0.164 | |||
| 2 | 0 | 0 | 4.388 | 0.121 | 4.485 | 0.146 | 4.297 | 0.101 | |||
| 3 | 0 | 0 | 0 | 0 | 3.932 | 0.241 | 3.965 | 0.117 | |||
| 4+ | 0 | 0 | 0 | 0 | 0 | 0 | 4.039 | 0.145 | |||
| Tuesday | |||||||||||
| 0 | 4.375 | 0.154 | 4.425 | 0.138 | 5.031 | 0.243 | 4.204 | 0.299 | |||
| 1 | 4.472 | 0.165 | 4.615 | 0.111 | 4.265 | 0.096 | 4.732 | 0.161 | |||
| 2 | 0 | 0 | 4.361 | 0.136 | 4.423 | 0.111 | 4.401 | 0.122 | |||
| 3 | 0 | 0 | 0 | 0 | 4.408 | 0.16 | 4.273 | 0.141 | |||
| 4+ | 0 | 0 | 0 | 0 | 0 | 0 | 3.897 | 0.18 | |||
| Wednesday | |||||||||||
| 0 | 4.493 | 0.156 | 4.495 | 0.105 | 3.972 | 0.217 | 3.789 | 0.243 | |||
| 1 | 4.555 | 0.124 | 4.282 | 0.125 | 4.171 | 0.156 | 4.277 | 0.12 | |||
| 2 | 0 | 0 | 4.436 | 0.097 | 4.565 | 0.136 | 4.322 | 0.111 | |||
| 3 | 0 | 0 | 0 | 0 | 4.024 | 0.28 | 4.14 | 0.151 | |||
| 4+ | 0 | 0 | 0 | 0 | 0 | 0 | 4.572 | 0.306 | |||
source('load_map_layer.R')
tbl1 <- make_table(
data = dt,
agg = 'person_trip_rate',
factors = 'HHSTATE',
wgt_name = 'DAYWGT',
subset = 'FLAG100 == 1 & WHYTRP1S >= 0'
)
tbl2 <- make_table(
data = dt,
agg = 'person_trip_rate',
factors = c('HHSTATE','WHYTRP1S'),
wgt_name = 'DAYWGT',
subset = 'FLAG100 == 1 & WHYTRP1S >= 0'
)
tbl2 <- use_labels(tbl2, labels, keep = 'WHYTRP1S')
#list of attributes to copy for each factor split
attr_names <- c('response','factors','variance','factors_label','response_labels')
#split the data.table by HHSTATE and create svg for plot of each state
gg_html <- sapply(with(tbl2, split(tbl2, HHSTATE)), function(tbl) {
for(i in attr_names) setattr(tbl, i, attr(tbl2, i))
g <- make_bar_chart(tbl, interactive = F)
gsub("'", "\"", g$x$html) #remove single quotes from html string
})
#Create column of state names for merging
gg_html <- cbind(STUSPS = attr(gg_html, "names"), tooltip = unlist(gg_html))
#load geom_state_layer
load_map_layer('state_layer')
#Merge tbl1 with state_layer and then merge the tooltip
merged <- merge(x = state_layer, y = tbl1, by.x = 'STUSPS', by.y = 'HHSTATE')
merged <- merge(merged, gg_html, by = 'STUSPS')
geom_state_layer <- geom_polygon_interactive(
data = merged,
mapping = aes(
x = long,
y = lat,
group = group,
tooltip = paste0(NAME,'\n',tooltip),
fill = DAYWGT,
use_jquery = TRUE,
data_id = STATEFP,
hover_css = "fill:orange"),
color = "#FFFFFF",
size = 0.35)
#Create map
map <- ggplot() +
geom_state_layer +
coord_fixed() +
theme_void() +
scale_fill_gradient(low = "#f2f0f7", high = "#54278f")
tooltip_css <- "background-color:#F2F2F2;padding:10px;border-radius:10px 20px 10px 20px;"
map_widget <- ggiraph(code = {print(map)}, zoom_max = 12, tooltip_extra_css = tooltip_css)
map_widget